## Run main DiD specifcations at ad level, summarize distributions

library(tidyverse)
library(magrittr)
library(data.table)
library(lubridate)
library(hms)
library(ggthemes)
library(stargazer)
theme_set(theme_few())

### SET WORKING DIRECTORY HERE ###
path_to_archive <- "replication/"
data_dir <- paste0(path_to_archive, "data/")
setwd(data_dir)
plot_dir <- paste0(path_to_archive, "plots/")
tables_dir <- paste0(path_to_archive, "tables/")

### FUNCTION DEFINITIONS FOR WEIGHTING SCHEMES
diff2 <- function(x) x[seq(1,length(x)-1,2)] - x[seq(2,length(x),2)]

inv_var_weight <- function(i,d) {
	weighted.mean(d[i, est], w=sqrt(d[i, n_t] * d[i, n_c]), na.rm=T)
}

total_size_weight <- function(i, d) {
	weighted.mean(d[i,est], w=d[i,n_t] + d[i,n_c], na.rm=T)
}

simple_avg <- function(i,d){
	mean(d[i,est], na.rm=T)
}

### CHOOSE WINDOW LENGTH TO RUN HERE ###
### OPTIONS: 20 (default) / 10 / 5
window_length <- 20
if (window_length == 10) {
	suffix <- "_10min"
} else if (window_length == 5) {
	suffix <- "_5min"
} else {
	suffix <- ""
}

### READ IN PROCESSED DATA AT AD LEVEL ###
### (GENERATED FROM PROPRIETARY DATA BY SCRIPTS IN data_construction FOLDER) ###
dd_wide <- readRDS(paste0(data_dir, "all_dd_data", suffix, ".rds"))


# construct combined C group
s_cols <- grep("s_p", colnames(dd_wide), value=T)
all_controls <- dd_wide[group!="T", map(.SD, sum), .SDcols = s_cols, by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12)] %>%
	.[,group:="C"]

# compute group averages
dd_wide %<>% 
	rbind(all_controls) %>%
	.[,n_c := n_c1 + n_c2 + n_c12] %>%
	.[group=="C", (s_cols) := map(.SD, ~ . / n_c), .SDcols = s_cols] %>% 
	.[group=="C1", (s_cols) := map(.SD, ~ . / n_c1), .SDcols = s_cols] %>% 
	.[group=="C2", (s_cols) := map(.SD, ~ . / n_c2), .SDcols = s_cols] %>% 
	.[group=="C1+C2", (s_cols) := map(.SD, ~ . / n_c12), .SDcols = s_cols] %>% 
	.[group=="T", (s_cols) := map(.SD, ~ . / n_t), .SDcols = s_cols] 


# reshape
dd_long <- dd_wide %>% 
	gather(key="hour", val="view_mean", dplyr::starts_with("s_pre"), dplyr::starts_with("s_post")) %>%
	as.data.table

dd_long[, `:=`(pre_post = sub("s_(pre|post)_(\\d+)", "\\1", hour),
			   hour = sub("s_(pre|post)_(\\d+)", "\\2", hour) %>% as.numeric)]


### FIGURE A.1
### Distribution of treatment and control group sizes

dd_wide[,`:=`(n=ifelse(group=="T", n_t, n_c), Group=recode(group, C="Control", T="Treatment"))]
dd_size_stats <- dd_wide[group %in% c("C","T"),.(Min=min(n), Med=median(n), Max=max(n)),by=.(Group)]
dd_size_stats[,annot:=paste(paste("Min", Min, sep=": "),paste("Median", Med, sep=": "),paste("Max", Max, sep=": "), sep="\n")]

group_size_hist <- ggplot(aes(x=n), data = dd_wide[group %in% c("C","T")]) + 
	geom_histogram(binwidth=10) + 
	xlim(0,1250) +
	labs(x="Number of Devices", y="Ad Count") +  
	geom_label(aes(label=annot), data=dd_size_stats, x=750,y=15000,hjust="left") +
	facet_wrap(~ Group)
ggsave(group_size_hist, filename=paste0(plot_dir, "group_size_hist", suffix, ".pdf"), height=7, width=10)


dd_base <- 
	dd_long[group %in% c("C","T"), .(news_min = sum(view_mean)/60), by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post)] %>%
	setkey(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post) %>%
	.[, .(est = -diff2(diff2(.SD$news_min))) ,by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c)]

# full sample point estimate, bootstrap
pt_est_full <- inv_var_weight(1:nrow(dd_base),dd_base)

set.seed(1850159)
bs_full <- map_dbl(1:1000, ~ inv_var_weight(sample.int(nrow(dd_base), replace=T), dd_base))
results_full <- data.table(replicate = 0:1000, estimate = c(pt_est_full, bs_full))

pt_est_full_simple <- simple_avg(1:nrow(dd_base),dd_base)
set.seed(1850159)
bs_full_simple <- map_dbl(1:1000, ~ simple_avg(sample.int(nrow(dd_base), replace=T), dd_base))
results_full_simple <- data.table(replicate = 0:1000, estimate = c(pt_est_full_simple, bs_full_simple))

pt_est_full_size <- total_size_weight(1:nrow(dd_base),dd_base)
set.seed(1850159)
bs_full_size <- map_dbl(1:1000, ~ total_size_weight(sample.int(nrow(dd_base), replace=T), dd_base))
results_full_size <- data.table(replicate = 0:1000, estimate = c(pt_est_full_size, bs_full_size))


## separately for C1 / C2 / C3
dd_c1 <- 
	dd_long[group %in% c("C1","T"), .(news_min = sum(view_mean)/60), by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post)] %>%
	setkey(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post) %>%
	.[, .(est = -diff2(diff2(.SD$news_min))) ,by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c)]

pt_est_full_c1 <- inv_var_weight(1:nrow(dd_c1),dd_c1)
set.seed(1850159)
bs_full_c1 <- map_dbl(1:1000, ~ inv_var_weight(sample.int(nrow(dd_c1), replace=T), dd_c1))
results_full_c1 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_c1, bs_full_c1))

pt_est_full_simple_c1 <- simple_avg(1:nrow(dd_c1),dd_c1)
set.seed(1850159)
bs_full_simple_c1 <- map_dbl(1:1000, ~ simple_avg(sample.int(nrow(dd_c1), replace=T), dd_c1))
results_full_simple_c1 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_simple_c1, bs_full_simple_c1))

pt_est_full_size_c1 <- total_size_weight(1:nrow(dd_c1),dd_c1)
set.seed(1850159)
bs_full_size_c1 <- map_dbl(1:1000, ~ total_size_weight(sample.int(nrow(dd_c1), replace=T), dd_c1))
results_full_size_c1 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_size_c1, bs_full_size_c1))


dd_c2 <- 
	dd_long[group %in% c("C2","T"), .(news_min = sum(view_mean)/60), by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post)] %>%
	setkey(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post) %>%
	.[, .(est = -diff2(diff2(.SD$news_min))) ,by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c)]

pt_est_full_c2 <- inv_var_weight(1:nrow(dd_c2),dd_c2)
set.seed(1850159)
bs_full_c2 <- map_dbl(1:1000, ~ inv_var_weight(sample.int(nrow(dd_c2), replace=T), dd_c2))
results_full_c2 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_c2, bs_full_c2))

pt_est_full_simple_c2 <- simple_avg(1:nrow(dd_c2),dd_c2)
set.seed(1850159)
bs_full_simple_c2 <- map_dbl(1:1000, ~ simple_avg(sample.int(nrow(dd_c2), replace=T), dd_c2))
results_full_simple_c2 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_simple_c2, bs_full_simple_c2))

pt_est_full_size_c2 <- total_size_weight(1:nrow(dd_c2),dd_c2)
set.seed(1850159)
bs_full_size_c2 <- map_dbl(1:1000, ~ total_size_weight(sample.int(nrow(dd_c2), replace=T), dd_c2))
results_full_size_c2 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_size_c2, bs_full_size_c2))

dd_c12 <- 
	dd_long[group %in% c("C1+C2","T"), .(news_min = sum(view_mean)/60), by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post)] %>%
	setkey(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c, group, pre_post) %>%
	.[, .(est = -diff2(diff2(.SD$news_min))) ,by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12,n_c)]

pt_est_full_c12 <- inv_var_weight(1:nrow(dd_c12),dd_c12)
set.seed(1850159)
bs_full_c12 <- map_dbl(1:1000, ~ inv_var_weight(sample.int(nrow(dd_c12), replace=T), dd_c12))
results_full_c12 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_c12, bs_full_c12))

pt_est_full_simple_c12 <- simple_avg(1:nrow(dd_c12),dd_c12)
set.seed(1850159)
bs_full_simple_c12 <- map_dbl(1:1000, ~ simple_avg(sample.int(nrow(dd_c12), replace=T), dd_c12))
results_full_simple_c12 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_simple_c12, bs_full_simple_c12))

pt_est_full_size_c12 <- total_size_weight(1:nrow(dd_c12),dd_c12)
set.seed(1850159)
bs_full_size_c12 <- map_dbl(1:1000, ~ total_size_weight(sample.int(nrow(dd_c12), replace=T), dd_c12))
results_full_size_c12 <- data.table(replicate = 0:1000, estimate = c(pt_est_full_size_c12, bs_full_size_c12))

quantiles <- c(0.01,0.025,0.975,0.99)

ci_full_c1 <- results_full_c1[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_c2 <- results_full_c2[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_c12 <- results_full_c12[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_size_c1 <- results_full_size_c1[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_size_c2 <- results_full_size_c2[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_size_c12 <- results_full_size_c12[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_simple_c1 <- results_full_simple_c1[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_simple_c2 <- results_full_simple_c2[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_full_simple_c12 <- results_full_simple_c12[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]

ci_full_split <- rbind(
	ci_full_c1[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C1")],
	ci_full_c2[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C2")],
	ci_full_c12[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C3")])
ci_full_split_size <- rbind(
	ci_full_size_c1[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C1")],
	ci_full_size_c2[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C2")],
	ci_full_size_c12[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C3")])
ci_full_split_simple <- rbind(
	ci_full_simple_c1[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C1")],
	ci_full_simple_c2[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C2")],
	ci_full_simple_c12[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="C3")])


### TABLE 1 / C.4 / C.5
y <- rnorm(100)
x <- matrix(rnorm(100*nrow(ci_full_split)), nrow=100)
fake_model <- lm(y ~ x - 1)

dd_bootstrap_cis_split <- stargazer(list(fake_model, fake_model, fake_model),
		  title="Average Effects and Bootstrapped Confidence Intervals, by Control Group Split.",
		  style="apsr",
		  out=paste0(tables_dir, "dd_bootstrap_cis_split_control", suffix, ".tex"),
		  label=paste0("tab:dd_boostrap_cis_split_control", suffix),
		  coef=list(ci_full_split[,point_est], ci_full_split_size[,point_est], ci_full_split_simple[,point_est]), 
		  ci=T,
		  ci.custom=list(ci_full_split[, .(q_025, q_975)] %>% as.matrix,
		  				 ci_full_split_size[, .(q_025, q_975)] %>% as.matrix,
		  				 ci_full_split_simple[, .(q_025, q_975)] %>% as.matrix),
		  star.cutoffs = NA,
		  omit.stat = "all",
		  column.labels = c("Inv. Variance", "Total Devices", "Equal Weight"),
		  dep.var.labels = "Effect (Minutes)",
		  covariate.labels = ci_full_split[,split],
		  notes="\\parbox[t]{0.9\\linewidth}{Table reports average effects, weighted by 1) the geometric mean of treatment and control group size, 2) total devices in treatment and control groups and 3) a simple equally weighted average. Confidence intervals are the central 95\\% interval of 1000 bootstrap replicates within each subgroup. Group C1 is devices in control group which tuned out prior to ad time; C2 is devices in control group which tuned in after ad time; and C3 is devices in control group who tuned out before ad time, and then tuned back in after ad time.}",
		  notes.append=F
		  )

# by days to election

dd_base[,`:=` (days_to_election = as.numeric(mdy("11-06-2012") - date(ad_time_utc)),
	      weeks_to_election = as.numeric(mdy("11-06-2012") - date(ad_time_utc)) %/% 7)]

summary(lm(est ~days_to_election, weights = sqrt(dd_base$n_t*dd_base$n_c), data = dd_base))

pt_est_daily <- dd_base[,.(estimate = inv_var_weight(1:.N, .SD)),by=.(days_to_election)] %>% 
	setkey(days_to_election) %>% 
	.[,replicate:=0]

pt_est_daily_simple <- dd_base[,.(estimate = simple_avg(1:.N, .SD)),by=.(days_to_election)] %>% 
	setkey(days_to_election) %>% 
	.[,replicate:=0]

pt_est_daily_size <- dd_base[,.(estimate = total_size_weight(1:.N, .SD)),by=.(days_to_election)] %>% 
	setkey(days_to_election) %>% 
	.[,replicate:=0]

set.seed(90483)
bs_daily <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ inv_var_weight(sample.int(.N, replace=T), .SD))),
				by=.(days_to_election)] 

results_daily <- rbind(pt_est_daily, bs_daily) %>%
		setkey(days_to_election, replicate)

set.seed(90483)
bs_daily_simple <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ simple_avg(sample.int(.N, replace=T), .SD))),
				by=.(days_to_election)] 

results_daily_simple <- rbind(pt_est_daily_simple, bs_daily_simple) %>%
		setkey(days_to_election, replicate)

set.seed(90483)
bs_daily_size <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ total_size_weight(sample.int(.N, replace=T), .SD))),
				by=.(days_to_election)] 

results_daily_size <- rbind(pt_est_daily_size, bs_daily_size) %>%
		setkey(days_to_election, replicate)



# by weeks to election

pt_est_weekly <- dd_base[,.(estimate = inv_var_weight(1:.N, .SD)),by=.(weeks_to_election)] %>% 
	setkey(weeks_to_election) %>% 
	.[,replicate:=0]

pt_est_weekly_simple <- dd_base[,.(estimate = simple_avg(1:.N, .SD)),by=.(weeks_to_election)] %>% 
	setkey(weeks_to_election) %>% 
	.[,replicate:=0]

pt_est_weekly_size <- dd_base[,.(estimate = total_size_weight(1:.N, .SD)),by=.(weeks_to_election)] %>% 
	setkey(weeks_to_election) %>% 
	.[,replicate:=0]

set.seed(872301)
bs_weekly <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ inv_var_weight(sample.int(.N, replace=T), .SD))),
				by=.(weeks_to_election)] 

results_weekly <- rbind(pt_est_weekly, bs_weekly) %>%
		setkey(weeks_to_election, replicate)


set.seed(872301)
bs_weekly_simple <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ simple_avg(sample.int(.N, replace=T), .SD))),
				by=.(weeks_to_election)] 

results_weekly_simple <- rbind(pt_est_weekly_simple, bs_weekly_simple) %>%
		setkey(weeks_to_election, replicate)


set.seed(872301)
bs_weekly_size <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ total_size_weight(sample.int(.N, replace=T), .SD))),
				by=.(weeks_to_election)] 

results_weekly_size <- rbind(pt_est_weekly_size, bs_weekly_size) %>%
		setkey(weeks_to_election, replicate)


# by ad sponsor type
ads <- readRDS(paste0(data_dir, "final_ads.rds"))
dd_base <- dd_base[ads, on=c("dma_code","channel","affiliate","program","ad_time_utc"), nomatch=0]



### TABLE A.1
table(dd_base$ad_type)

    # cand_house      cand_pres    cand_senate cand_statewide        outside 
    #      24664          34257          15438          12954          25167 

pt_est_sponsor <- dd_base[,.(estimate = inv_var_weight(1:.N, .SD)),by=.(ad_type)] %>% 
	setkey(ad_type) %>% 
	.[,replicate:=0]

pt_est_sponsor_simple <- dd_base[,.(estimate = simple_avg(1:.N, .SD)),by=.(ad_type)] %>% 
	setkey(ad_type) %>% 
	.[,replicate:=0]

pt_est_sponsor_size <- dd_base[,.(estimate = total_size_weight(1:.N, .SD)),by=.(ad_type)] %>% 
	setkey(ad_type) %>% 
	.[,replicate:=0]

set.seed(256)
bs_sponsor <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ inv_var_weight(sample.int(.N, replace=T), .SD))),
				by=.(ad_type)] 

results_sponsor <- rbind(pt_est_sponsor, bs_sponsor) %>%
		setkey(ad_type, replicate)


set.seed(256)
bs_sponsor_simple <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ simple_avg(sample.int(.N, replace=T), .SD))),
				by=.(ad_type)] 

results_sponsor_simple <- rbind(pt_est_sponsor_simple, bs_sponsor_simple) %>%
		setkey(ad_type, replicate)


set.seed(256)
bs_sponsor_size <- dd_base[,.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ total_size_weight(sample.int(.N, replace=T), .SD))),
				by=.(ad_type)] 

results_sponsor_size <- rbind(pt_est_sponsor_size, bs_sponsor_size) %>%
		setkey(ad_type, replicate)

# by incumbent / challenger
dd_base[ad_type=="cand_house", incumbent:= as.numeric(distIDCurr == distIDRunFor)]
dd_base[ad_type=="cand_senate",incumbent:= as.numeric(distIDCurr == distIDRunFor)]
dd_base[ad_type=="cand_pres",incumbent:= as.numeric(grepl("OBAMA BARACK FOR PRESIDENT",Advertiser))]
dd_base[ad_type=="cand_statewide",incumbent:= as.numeric(candidate %in% c("dalrymple jack", "kinder peter", "markell jack","nixon jay","tomblin earl"))]


pt_est_inc <- dd_base[!is.na(incumbent),.(estimate = inv_var_weight(1:.N, .SD)),by=.(incumbent)] %>% 
	setkey(incumbent) %>% 
	.[,replicate:=0]

pt_est_inc_simple <- dd_base[!is.na(incumbent),.(estimate = simple_avg(1:.N, .SD)),by=.(incumbent)] %>% 
	setkey(incumbent) %>% 
	.[,replicate:=0]

pt_est_inc_size <- dd_base[!is.na(incumbent),.(estimate = total_size_weight(1:.N, .SD)),by=.(incumbent)] %>% 
	setkey(incumbent) %>% 
	.[,replicate:=0]

set.seed(984)
bs_inc <- dd_base[!is.na(incumbent),.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ inv_var_weight(sample.int(.N, replace=T), .SD))),
				by=.(incumbent)] 

results_inc <- rbind(pt_est_inc, bs_inc) %>%
		setkey(incumbent, replicate)

set.seed(984)
bs_inc_simple <- dd_base[!is.na(incumbent),.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ simple_avg(sample.int(.N, replace=T), .SD))),
				by=.(incumbent)] 

results_inc_simple <- rbind(pt_est_inc_simple, bs_inc_simple) %>%
		setkey(incumbent, replicate)

set.seed(984)
bs_inc_size <- dd_base[!is.na(incumbent),.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ total_size_weight(sample.int(.N, replace=T), .SD))),
				by=.(incumbent)] 

results_inc_size <- rbind(pt_est_inc_size, bs_inc_size) %>%
		setkey(incumbent, replicate)

# by incumbent / challenger X office

pt_est_inc_sponsor <- dd_base[!is.na(incumbent),.(estimate = inv_var_weight(1:.N, .SD)),by=.(incumbent, ad_type)] %>% 
	setkey(ad_type, incumbent) %>% 
	.[,replicate:=0]

pt_est_inc_sponsor_simple <- dd_base[!is.na(incumbent),.(estimate = simple_avg(1:.N, .SD)),by=.(incumbent, ad_type)] %>% 
	setkey(ad_type, incumbent) %>% 
	.[,replicate:=0]

pt_est_inc_sponsor_size <- dd_base[!is.na(incumbent),.(estimate = total_size_weight(1:.N, .SD)),by=.(incumbent, ad_type)] %>% 
	setkey(ad_type, incumbent) %>% 
	.[,replicate:=0]

set.seed(366854)
bs_inc_sponsor <- dd_base[!is.na(incumbent),.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ inv_var_weight(sample.int(.N, replace=T), .SD))),
				by=.(ad_type, incumbent)] 

results_inc_sponsor <- rbind(pt_est_inc_sponsor, bs_inc_sponsor) %>%
		setkey(ad_type, incumbent, replicate)

set.seed(366854)
bs_inc_sponsor_simple <- dd_base[!is.na(incumbent),.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ simple_avg(sample.int(.N, replace=T), .SD))),
				by=.(ad_type, incumbent)] 

results_inc_sponsor_simple <- rbind(pt_est_inc_sponsor_simple, bs_inc_sponsor_simple) %>%
		setkey(ad_type, incumbent, replicate)

set.seed(366854)
bs_inc_sponsor_size <- dd_base[!is.na(incumbent),.(replicate = 1:1000, 
				  estimate = map_dbl(1:1000, ~ total_size_weight(sample.int(.N, replace=T), .SD))),
				by=.(ad_type, incumbent)] 

results_inc_sponsor_size <- rbind(pt_est_inc_sponsor_size, bs_inc_sponsor_size) %>%
		setkey(ad_type, incumbent, replicate)


save(results_full,results_daily,results_weekly,results_sponsor,results_inc,results_inc_sponsor,file=paste0(data_dir, "dd_base_bootstraps.RData"))
save(results_full_simple,results_daily_simple,results_weekly_simple,results_sponsor_simple,results_inc_simple,results_inc_sponsor_simple,file=paste0(data_dir, "dd_bootstraps_simple_wgt.RData"))
save(results_full_size,results_daily_size,results_weekly_size,results_sponsor_size,results_inc_size,results_inc_sponsor_size,file=paste0(data_dir, "dd_bootstraps_size_wgt.RData"))

### confidence intervals ###


quantiles <- c(0.01,0.025,0.975,0.99)
sponsor_levels <- c("House", "President", "Senate", "Statewide","Outside Group")
inc_levels <- c("Challenger","Incumbent")

ci_full <- results_full[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_daily <- results_daily[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(days_to_election)]
ci_weekly <- results_weekly[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(weeks_to_election)]
ci_sponsor <- results_sponsor[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(ad_type)]
ci_inc <- results_inc[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(incumbent)]
ci_inc_sponsor <- results_inc_sponsor[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))),, by =.(ad_type, incumbent)]

cis_all <- rbind(
	ci_full[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Full Sample", subgroup="")],
	ci_daily[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Days to Election", subgroup=ci_daily$days_to_election)],
	ci_weekly[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Weeks to Election", subgroup=ci_weekly$weeks_to_election)],
	ci_sponsor[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Sponsor", subgroup=sponsor_levels)],
	ci_inc[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Incumbency", subgroup=inc_levels)],
	ci_inc_sponsor[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Sponsor x Incumbency", subgroup=paste(rep(sponsor_levels[1:4], times=2), rep(inc_levels, each=4), sep=" "))])


ci_full_simple <- results_full_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_daily_simple <- results_daily_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(days_to_election)]
ci_weekly_simple <- results_weekly_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(weeks_to_election)]
ci_sponsor_simple <- results_sponsor_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(ad_type)]
ci_inc_simple <- results_inc_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(incumbent)]
ci_inc_sponsor_simple <- results_inc_sponsor_simple[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))),, by =.(ad_type, incumbent)]

cis_all_simple <- rbind(
	ci_full_simple[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Full Sample", subgroup="")],
	ci_daily_simple[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Days to Election", subgroup=ci_daily$days_to_election)],
	ci_weekly_simple[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Weeks to Election", subgroup=ci_weekly$weeks_to_election)],
	ci_sponsor_simple[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Sponsor", subgroup=sponsor_levels)],
	ci_inc_simple[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Incumbency", subgroup=inc_levels)],
	ci_inc_sponsor_simple[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Sponsor x Incumbency", subgroup=paste(rep(sponsor_levels[1:4], times=2), rep(inc_levels, each=4), sep=" "))])

ci_full_size <- results_full_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_")))]
ci_daily_size <- results_daily_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(days_to_election)]
ci_weekly_size <- results_weekly_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(weeks_to_election)]
ci_sponsor_size <- results_sponsor_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(ad_type)]
ci_inc_size <- results_inc_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))), by =.(incumbent)]
ci_inc_sponsor_size <- results_inc_sponsor_size[,c(point_est=estimate[replicate==0], map(quantiles, ~ quantile(estimate[replicate!=0], probs=.)) %>% set_names(paste("q",c("01","025","975","99"), sep="_"))),, by =.(ad_type, incumbent)]

cis_all_size <- rbind(
	ci_full_size[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Full Sample", subgroup="")],
	ci_daily_size[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Days to Election", subgroup=ci_daily$days_to_election)],
	ci_weekly_size[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Weeks to Election", subgroup=ci_weekly$weeks_to_election)],
	ci_sponsor_size[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Sponsor", subgroup=sponsor_levels)],
	ci_inc_size[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Incumbency", subgroup=inc_levels)],
	ci_inc_sponsor_size[,.(point_est,q_01,q_025,q_975,q_99)][,`:=` (split="Sponsor x Incumbency", subgroup=paste(rep(sponsor_levels[1:4], times=2), rep(inc_levels, each=4), sep=" "))])


saveRDS(cis_all, file=paste0(data_dir, "dd_bootstrap_cis", suffix, ".rds"))
saveRDS(cis_all_simple, file=paste0(data_dir, "dd_bootstrap_cis_simple_wgt", suffix, ".rds"))
saveRDS(cis_all_size, file=paste0(data_dir, "dd_bootstrap_cis_size_wgt", suffix, ".rds"))


### graph for daily estimates ###

### FIGURE 2(B)
daily_graph <- ggplot(aes(x=days_to_election, y=point_est), data=ci_daily[days_to_election>=0]) + 
	geom_pointrange(aes(ymin=q_025, ymax=q_975)) +
	ylab("Treatment Effect (mins)") +
	xlab("Days to Election") +
	geom_hline(yintercept=0, lty="dashed") +
	geom_smooth(aes(x=days_to_election, y=est, weight=sqrt(n_c*n_t)), data = dd_base, method="lm", colour = "grey20") +
	scale_x_reverse(limits=c(60,0)) 

ggsave(daily_graph, height=7, width=10, file=paste0(plot_dir, "dd_daily", suffix, ".pdf"))


### TABLE 2 / C.6 / C.7 ###
### ESTIMATES BY SUBGROUP TABLES ###

cis_to_print <- cis_all[split %in% c("Full Sample", "Sponsor", "Incumbency")]
cis_to_print_simple <- cis_all_simple[split %in% c("Full Sample", "Sponsor", "Incumbency")]
cis_to_print_size <- cis_all_size[split %in% c("Full Sample", "Sponsor", "Incumbency")]

y <- rnorm(100)
x <- matrix(rnorm(100*nrow(cis_to_print)), nrow=100)

fake_model <- lm(y ~ x - 1)



dd_bootstrap_cis <- stargazer(list(fake_model, fake_model, fake_model),
		  title="Average Effects and Bootstrapped Confidence Intervals, by Subgroup.",
		  style="apsr",
		  # out=paste0(tables_dir,"dd_bootstrap_cis", suffix, ".tex"),
		  label=paste0("tab:dd_boostrap_cis",suffix),
		  coef=list(cis_to_print[,point_est], cis_to_print_size[,point_est], cis_to_print_simple[,point_est]), 
		  ci=T,
		  ci.custom=list(cis_to_print[, .(q_025, q_975)] %>% as.matrix,
		  				 cis_to_print_size[, .(q_025, q_975)] %>% as.matrix,
		  				 cis_to_print_simple[, .(q_025, q_975)] %>% as.matrix),
		  star.cutoffs = NA,
		  omit.stat = "all",
		  column.labels = c("Inv. Variance", "Total Devices", "Equal Weight"),
		  dep.var.labels = "Effect (Minutes)",
		  covariate.labels = cis_to_print[,subgroup],
		  notes="\\parbox[t]{0.9\\linewidth}{Table reports average effects, weighted by 1) the geometric mean of treatment and control group size, 2) total devices in treatment and control groups and 3) a simple equally weighted average. Confidence intervals are the central 95\\% interval of 1000 bootstrap replicates within each subgroup.}",
		  notes.append=F
		  )

dd_bootstrap_cis %<>% 
	sub(pattern="lccc\\}", replacement="llccc}") %>%
	sub(pattern="& \\\\multicolumn\\{3\\}", replacement="& Subgroup & \\\\multicolumn{3}") %>%
	sub(pattern="& Inv. Variance", replacement="& & Inv. Variance") %>%
	sub(pattern="& \\(1\\)", replacement="& & (1)") %>%
	sub(pattern="  & ([^(])", replacement="  \\\\multirow{2}{*}{Full Sample} & & \\1") %>%
	sub(pattern="  House &", replacement="  \\\\multirow{10}{*}{Sponsor} & House &") %>%
	sub(pattern="  Challenger &", replacement="  \\\\multirow{4}{*}{Incumbency} & Challenger &") %>%
	sub(pattern="  ([A-Za-z ]+) & ", replacement="  & \\1 & ") %>%
	sub(pattern="  & \\(", replacement="  & & (") %>%
	sub(pattern="multicolumn\\{4\\}",replacement="multicolumn{5}")

dd_bootstrap_cis <- c(
	dd_bootstrap_cis[1:14],
	dd_bootstrap_cis[12],
	dd_bootstrap_cis[15:24],
	dd_bootstrap_cis[12],
	dd_bootstrap_cis[25:32])

cat(dd_bootstrap_cis, file=paste0(tables_dir,"dd_bootstrap_cis", suffix, ".tex"),sep="\n")

### TABLE 2 ###
### HETEROGENEITY BY AUDIENCE CHARACTERISTICS ###
### THIS REQUIRES PROPRIETARY DATA (NOT INCLUDED IN THE ARCHIVE) ###


### THIS FILE IS PROPRIETARY (FROM FWM DATA)
ref <- readRDS("stb/master_dev_list.RData") %>% as.data.table
###

get_ad_audience_avgs <- function(d) {
	t_c_tday <- readRDS(paste0(data_dir, "dd/tc_groups_", as.character(d), ".rds")) %>% 
		unnest(cols=c(t_c)) %>%
		as.data.table

	audience <- t_c_tday[ref[,.(device_id, age, hhinc, college, man, white, black, hispanic)], 
						 on = .(device_id), nomatch=0] %>%
						 .[,map(.SD, mean, na.rm=T),.SDcols = c("age", "hhinc", "college", "man", "white", "black", "hispanic"),by=.(dma_code, channel, affiliate, program, event_time_utc)]

	sizes <- t_c_tday[,.(n_tot = .N, n_t = sum(group == "T"), n_c = sum(group != "T")),by=.(dma_code, channel, affiliate, program, event_time_utc)]

	sizes[audience, on = .(dma_code, channel, affiliate, program, ad_time_utc=event_time_utc)]

}

dates <- seq(mdy("09/01/2012"), mdy("11/05/2012"), by = "days")
audience <- dates %>% map(get_ad_audience_avgs) %>% rbindlist

dd_base_aud <- dd_base[audience, on = .(dma_code, channel, affiliate, program, ad_time_utc)]

dd_base_aud[,age_quart := factor(findInterval(age, quantile(age, probs = seq(0.25, 0.75, 0.25), na.rm=T)))]
dd_base_aud[,hhinc_quart := factor(findInterval(hhinc, quantile(hhinc, probs = seq(0.25, 0.75, 0.25), na.rm=T)))]
dd_base_aud[,college_quart := factor(findInterval(college, quantile(college, probs = seq(0.25, 0.75, 0.25), na.rm=T)))]
dd_base_aud[,man_quart := factor(findInterval(man, quantile(man, probs = seq(0.25, 0.75, 0.25), na.rm=T)))]
dd_base_aud[,black_quart := factor(findInterval(black, quantile(black, probs = seq(0.25, 0.75, 0.25), na.rm=T)))]
dd_base_aud[,hispanic_quart := factor(findInterval(hispanic, quantile(hispanic, probs = seq(0.25, 0.75, 0.25), na.rm=T)))]


dd_base_aud[ad_type == "outside", incumbent := 0]


het_w1 <- feols(est ~ age_quart + hhinc_quart + college_quart + black_quart + hispanic_quart | ad_type + incumbent, data = dd_base_aud, weight = sqrt(dd_base_aud$n_t * dd_base_aud$n_c))
het_w2 <- feols(est ~ age_quart + hhinc_quart + college_quart + black_quart + hispanic_quart | ad_type + incumbent, data = dd_base_aud, weight = dd_base_aud$n_tot)
het_w3 <- feols(est ~ age_quart + hhinc_quart + college_quart + black_quart + hispanic_quart | ad_type + incumbent, data = dd_base_aud)

setFixest_dict(c(
	age_quart1 = "Age (Q2)",
	hhinc_quart1 = "Income (Q2)",
	college_quart1 = "College Grad (Q2)",
	man_quart1 = "Male (Q2)",
	black_quart1 = "Black (Q2)",
	hispanic_quart1 = "Hispanic (Q2)",
	age_quart2 = "Age (Q3)",
	hhinc_quart2 = "Income (Q3)",
	college_quart2 = "College Grad (Q3)",
	man_quart2 = "Male (Q3)",
	black_quart2 = "Black (Q3)",
	hispanic_quart2 = "Hispanic (Q3)",
	age_quart3 = "Age (Q4)",
	hhinc_quart3 = "Income (Q4)",
	college_quart3 = "College Grad (Q4)",
	man_quart3 = "Male (Q4)",
	black_quart3 = "Black (Q4)",
	hispanic_quart3 = "Hispanic (Q4)",
	household_id = "Household",
	est = "DiD Estimate",
	ad_type = "Office",
	incumbent = "Incumbent"))

new_style = list(depvar="title:",
                 model="title:",
                 lines = "top:\\toprule; bottom:\\bottomrule",
                 var = "title:\\midrule", 
                 fixef = "title:\\midrule; suffix: FE;where:var",
                 stats = "title:\\midrule") 

setFixest_etable(fitstat = ~ r2, yesNo = "$\\checkmark$",
                 style = new_style)

etable(list(het_w1, het_w2, het_w3),
	title="Heterogeneity in DiD Estimates by Audience Characteristics",
	file=paste0(tables_dir, "audience_het.tex"),
	replace=T,
	fixef_sizes=T,
	label="tab:audience_het",
	subtitles=c("Inv. Variance", "Total Devices", "Equal Weight"),
	notes="\\parbox[t]{0.7\\linewidth}{An observation is an ad. The dependent variable is the ad-level differences-in-differences estimate of the effect on news consumption. The sample is all households active and tuned in to the channel on which a political ad ran at the time the ad began, i.e. the treatment group from the differences-in-differences analyses. Column (1) weights by the geometric mean of treatment and control group size; column (2) weights by the total number of devices (treatment plus control group size); column (3) is equally weighted. Each audience characteristic is computed as the mean of the variable among all devices in treatment and control groups for a given ad. Regressors are dummies for the ad being in the indicated quartile of the distribution of ad-level means for the indicated characteristic. The first and second quartiles of the ad-level mean for Black and Hispanic are equal (the median is zero for both), hence the Q2 dummy is omitted for these variables.}"
	  )